лет %!s(int64=10): %!d(string=назад) · c0ce33a2d6
--- a/app/models/agents/website_agent.rb
+++ b/app/models/agents/website_agent.rb
@@ -31,7 +31,7 @@ module Agents
 
                 
              
 
                       # Scraping HTML and XML
              
 
                 
              
 
                -      When parsing HTML or XML, these sub-hashes specify how each extraction should be done.  The Agent first selects a node set from the document for each extraction key by evaluating either a CSS selector in `css` or an XPath expression in `xpath`.  It then evaluates an XPath expression in `value` on each node in the node set, converting the result into string.  Here's an example:
              
 
                +      When parsing HTML or XML, these sub-hashes specify how each extraction should be done.  The Agent first selects a node set from the document for each extraction key by evaluating either a CSS selector in `css` or an XPath expression in `xpath`.  It then evaluates an XPath expression in `value` (default: `.`) on each node in the node set, converting the result into string.  Here's an example:
              
 
                 
              
 
                           "extract": {
              
 
                             "url": { "css": "#comic img", "value": "@src" },
              
@@ -39,7 +39,7 @@ module Agents
 
                             "body_text": { "css": "div.main", "value": ".//text()" }
              
 
                           }
              
 
                 
              
 
                -      "@_attr_" is the XPath expression to extract the value of an attribute named _attr_ from a node, and ".//text()" is to extract all the enclosed texts. To extract the innerHTML, use "./node()"; and to extract the outer HTML, use  ".". 
              
 
                +      "@_attr_" is the XPath expression to extract the value of an attribute named _attr_ from a node, and ".//text()" is to extract all the enclosed texts. To extract the innerHTML, use "./node()"; and to extract the outer HTML, use  ".".
              
 
                 
              
 
                       You can also use [XPath functions](http://www.w3.org/TR/xpath/#section-String-Functions) like `normalize-space` to strip and squeeze whitespace, `substring-after` to extract part of a text, and `translate` to remove comma from a formatted number, etc.  Note that these functions take a string, not a node set, so what you may think would be written as `normalize-space(.//text())` should actually be `normalize-space(.)`.
              
 
                 
              
@@ -373,7 +373,7 @@ module Agents
 
                         case nodes
              
 
                         when Nokogiri::XML::NodeSet
              
 
                           result = nodes.map { |node|
              
 
                -            case value = node.xpath(extraction_details['value'])
              
 
                +            case value = node.xpath(extraction_details['value'] || '.')
              
 
                             when Float
              
 
                               # Node#xpath() returns any numeric value as float;
              
 
                               # convert it to integer as appropriate.